libs <- c("bench", "tidyverse", "yaml", "rvest")
for (lib in libs) 
  require(lib, character.only = TRUE)
config <- read_yaml( "config.yaml" )

ggplot2を用いた可視化

# 下地
g <- ggplot()

# ヒストグラム
# binの大きさは自動で設定してくれる
ggplot() + 
  geom_histogram(data = mpg, mapping = aes(x = displ))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

# 密度分布
# geom_densityで描くとキレイではないので, lineでstatをdensityにする
ggplot(mpg) + 
  geom_line(mapping = aes(x = displ), stat = "density")

色を分けたい場合には変数を因子型として離散変数と見なされる ように配慮する

mpg %>%
  mutate(cyl_fct = factor(cyl, levels = 4:8, order = TRUE)) %>%
  ggplot(mapping = aes(x = displ, y = cty, group = cyl_fct, colour = cyl_fct)) + 
  geom_point()

# グループ化することで近似曲線も描ける

mpg %>%
  mutate(cyl_fct = factor(cyl, levels = 4:8, order = TRUE)) %>%
  ggplot(mapping = aes(x = displ, y = cty, group = cyl_fct, colour = cyl_fct)) + 
  geom_point() + 
  geom_smooth(method = "lm")

# こんな絵も記述できる
ggplot(data=mpg, mapping = aes(x = displ, y = cty)) + 
  geom_point(colour = "chocolate", shape = 35, size = 10) + 
  geom_smooth(method = "lm", linetype = "dashed", se = FALSE)

annotate

データセットではないが, 簡易的に, つまりはベクトルで データを追加したい場合にはannotateを使う.

add_x <- c(2.5, 3, 3.5)
add_y <- c(25, 27.5, 30)

ggplot(data = mpg, mapping = aes(x = displ, y = cty)) + 
  geom_point() + 
  annotate(geom = "point", x = add_x, y = add_y, colour = "red") + 
  annotate(geom = "text", x = c(5, 5), y = c(30, 25), label = c("要チェック", "赤色のデータを追加"))

統計処理

x軸が離散変数の場合.

mean_cty <- 
  mpg %>% 
  group_by(class) %>% 
  summarise(cty = mean(cty))

ggplot(mean_cty, aes(class, cty)) +
  geom_bar(stat = "identity")

ggplot(mpg, aes(class, cty)) +
  geom_bar(stat = "summary", fun.y = "mean")

要約統計量を知る. 関数はオリジナルでも大丈夫のよう.

q10 <- partial(quantile, prob = .1)
q90 <- partial(quantile, prob = .9)
mpg %>%
  ggplot(mapping = aes(x = class, y = cty)) + 
  stat_summary(geom = "pointrange", fun.y = "mean", fun.ymax = "max", fun.ymin = "min") + 
  stat_summary(geom = "pointrange", fun.y = "median", fun.ymax = "q90", fun.ymin = "q10", colour ="red")

ヴァイオリンプロットは面積に情報を持たせることができる. このグラフを見ると, 点が重なってしまっていることがわかる.

ggplot(data = mpg, mapping = aes(x = class, y = cty)) + 
  geom_violin(scale = "count") + 
  geom_point(mapping = aes(colour = class), show.legend = FALSE)

jitterで点の重なりを避ける.

ggplot(data = mpg, mapping = aes(x = class, y = cty)) + 
  stat_summary(geom = "bar", fun.y = "mean") + 
  geom_jitter(mapping = aes(colour = class), width = .4, height = .0, show.legend = FALSE)

position

ggplot(data = mpg, mapping = aes(x = class, y = cty, fill = factor(year)))+
stat_summary(geom = "bar", fun.y = "mean")+
stat_summary(fun.data = "mean_se")

# 中央:position = "stack"
ggplot(data = mpg, mapping = aes(x = class, y = cty, fill = factor(year)))+
geom_bar(stat = "summary", fun.y = "mean")+
stat_summary(fun.data = "mean_se")

# 右図:position = "dodge"
ggplot(data = mpg, mapping = aes(x = class, y = cty, fill = factor(year)))+
stat_summary(geom = "bar", fun.y = "mean", position = position_dodge())+
stat_summary(fun.data = "mean_se", position = position_dodge(width = 0.9))

特定の範囲を拡大表示

coord_cartesianを使う.

ggplot(data = mpg, mapping = aes(x = displ, y = cty)) +
geom_point() +
geom_vline(xintercept = 4) +
geom_hline(yintercept = 15) +
geom_smooth(method = "lm", se = FALSE)

# 右図:X軸、Y軸の特定の範囲を拡大表示した場合
ggplot(data = mpg, mapping = aes(x = displ, y = cty)) +
geom_point() +
coord_cartesian(xlim = c(1.5, 4.5), ylim = c(10, 35)) +
geom_vline(xintercept = 4) +
geom_hline(yintercept = 15) +
geom_smooth(method = "lm", se = FALSE)

他者と共有可能な状態に仕上げる

themeで頑張れる. themeをどのように使うのかは, theme_bw()の中身や結果を 確かめながらやる.

# 関数としてのtheme
theme_bw
## function (base_size = 11, base_family = "") 
## {
##     theme_grey(base_size = base_size, base_family = base_family) %+replace% 
##         theme(panel.background = element_rect(fill = "white", 
##             colour = NA), panel.border = element_rect(fill = NA, 
##             colour = "grey20"), panel.grid.major = element_line(colour = "grey92"), 
##             panel.grid.minor = element_line(colour = "grey92", 
##                 size = 0.25), strip.background = element_rect(fill = "grey85", 
##                 colour = "grey20"), legend.key = element_rect(fill = "white", 
##                 colour = NA), complete = TRUE)
## }
## <bytecode: 0x000000001cff6d98>
## <environment: namespace:ggplot2>

配色を自分で決める

ggplot(data = mpg, mapping = aes(x = drv, y = cty, fill = drv)) +
geom_boxplot() +
scale_fill_manual(values = c("4" = "black", "f" = "grey", "r" = "#ffffff"))

ラベルを変更する

ggplot(data = mpg, mapping = aes(x = displ, y = cty, group = factor(cyl), colour = factor(cyl))) +
  geom_point() + 
  labs(
    title = "エンジンの大きさと市街地における燃費の関係",
    subtitle = "1999年と2008年のデータを用いて", 
    caption  = "出典:xxx",
    x = "エンジンの大きさ(L)",
    y = "市街地における燃費(mpg)", 
    colour = "シリンダー数"
  ) + 
  theme_light(base_size = 18)

GUI

GUIベースでggplotやPlotyの設定できるのでとても便利. またはRStudioのアドインであるggThemeAssistを使えばよいらしい.

library(ggplotgui)
ggplot_shiny(data = mpg)
# ggplotオブジェクトを渡して, Plotlyを起動させることもできる. 
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
g <- ggplot(data = mpg, mapping = aes(x = class, y = displ, colour =
class)) +
  theme_bw() +
  geom_violin() +
  geom_jitter(size = 1, alpha = 0.5, width = 0.25, colour = "black")

ggplotly(p = g)
## We recommend that you use the dev version of ggplot2 with `ggplotly()`
## Install it with: `devtools::install_github('hadley/ggplot2')`

複数のグラフを並べる

  • gridExtra::grid.arrange
  • cowplot::plot_grid
  • ggpubr::ggarrange
library(ggpubr)
## Loading required package: magrittr
## 
## Attaching package: 'magrittr'
## The following object is masked from 'package:purrr':
## 
##     set_names
## The following object is masked from 'package:tidyr':
## 
##     extract
g1 <- ggplot(data = mpg, mapping = aes(x = displ, y = cty)) +
theme_classic() +
geom_point(colour = "seagreen")
g2 <- ggplot(data = mpg, mapping = aes(x = displ, y = hwy)) +
theme_classic() +
geom_point(colour = "lightskyblue")
ggarrange(g1, g2, labels = c("市街地", "高速道路"), ncol = 2, hjust = -1.5)

色に配慮する

library(ggthemes)
ggplot(data = mpg, mapping = aes(x = class, y = cty, fill = class)) +
 geom_boxplot(show.legend = TRUE) + 
 scale_fill_colorblind()